
clear all

/// Creates 2000 and 2010 census block-level dataset for 50 states: population, voting age population, race, gender

/*
census block-level data for population, voting age population, citizen voting age population, race, and gender were downloaded at NHGIS: 

 Steven Manson, Jonathan Schroeder, David Van Riper, Katherine Knowles, Tracy Kugler, Finn Roberts, and Steven Ruggles. IPUMS National Historical Geographic Information System: Version 18.0 [dataset]. Minneapolis, MN: IPUMS. 2023. http://doi.org/10.18128/D050.V18.0


// download the following tables at the block-level: 



--------------------------------------------------------------------------------
Data Summary
--------------------------------------------------------------------------------
Year: 2000
Geographic level: Block (by State--County--Census Tract)
Dataset:          2000 Census: SF 1b - 100% Data [Blocks & Block Groups]
   NHGIS code:    2000_SF1b
   NHGIS ID:      ds147
 
Tables:
 
1. Total Population
   Universe:    Persons
   Source code: NP001A
   NHGIS code:  FXS
 
2. Population by Urban and Rural
   Universe:    Persons
   Source code: NP002A
   NHGIS code:  FXT
 
3. Population by Hispanic or Latino and Not Hispanic or Latino by Race
   Universe:    Persons
   Source code: NP008A
   NHGIS code:  FYF
 
4. Population by Sex by Age
   Universe:    Persons
   Source code: NP012B
   NHGIS code:  FYM


Year: 2010
Geographic level: Block (by State--County--Census Tract)
Dataset:          2010 Census: SF 1a - P & H Tables [Blocks & Larger Areas]
   NHGIS code:    2010_SF1a
   NHGIS ID:      ds172
Breakdown(s):     Geographic Subarea:
                     Total area (00)
 
Tables:
 
1. Total Population
   Universe:    Total population
   Source code: P1
   NHGIS code:  H7V
 
2. Urban and Rural
   Universe:    Total population
   Source code: P2
   NHGIS code:  H7W
 
3. Hispanic or Latino Origin by Race
   Universe:    Total population
   Source code: P5
   NHGIS code:  H7Z
 
4. Sex by Age
   Universe:    Total population
   Source code: P12
   NHGIS code:  H76


// download the following tables at the tract-level (unavailable at smaller units):

--------------------------------------------------------------------------------
Data Summary
--------------------------------------------------------------------------------
 
Year:             2008-2012
Geographic level: Census Tract (by State--County)
Dataset:          2012 American Community Survey: 5-Year Data [2008-2012, Tracts & Larger Areas]
   NHGIS code:    2008_2012_ACS5b
   NHGIS ID:      ds192
Breakdown(s):     Geographic Component:
                     Total area (00)
Data type(s):     (E) Estimates
                  (M) Margins of error
 
Tables:
 
1. Sex by Age by Nativity and Citizenship Status
   Universe:    Total population
   Source code: B05003
   NHGIS code:  Q2Z   
   
  
Year:             2005-2009
Geographic level: Census Tract (by State--County)
Dataset:          2009 American Community Survey: 5-Year Data [2005-2009, Tracts & Larger Areas]
   NHGIS code:    2005_2009_ACS5b
   NHGIS ID:      ds196
Breakdown(s):     Geographic Component:
                     Total area (00)
Data type(s):     (E) Estimates
                  (M) Margins of error
 
Tables:
 
1. Sex by Age by Citizenship Status
   Universe:    Total population
   Source code: B05003
   NHGIS code:  RU0
   

Year:             2013-2017
Geographic level: Census Tract (by State--County)
Dataset:          2017 American Community Survey: 5-Year Data [2013-2017, Tracts & Larger Areas]
   NHGIS code:    2013_2017_ACS5b
   NHGIS ID:      ds234
Breakdown(s):     Geographic Component:
                     Total area (00)
Data type(s):     (E) Estimates
                  (M) Margins of error
 
Tables:
 
1. Sex by Age by Nativity and Citizenship Status
   Universe:    Total population
   Source code: B05003
   NHGIS code:  AH8Y
   
*/

*2000 block-level data
import delim "$RawDataPath/nhgis0025_csv/nhgis0025_ds147_2000_block.csv", clear

rename (statea countya tracta blck_grpa blocka)(fstate fcounty ftract fblockgroup fblock)

rename (fxs001 fxt001 fxt003) (population urban rural)


egen hispanic=rowtotal(fyf008-fyf014), missing
egen white=rowtotal(fyf001 fyf008), missing
rename fyf001 whitenh
egen black=rowtotal(fyf002 fyf009), missing
rename fyf002 blacknh

egen male=rowtotal(fym001-fym023), missing
egen female=rowtotal(fym024-fym046), missing
egen vap=rowtotal(fym005-fym023 fym028-fym046), missing //voting age population


keep gisjoin state fstate fcounty ftract fblockgroup fblock population urban rural hispanic white whitenh black blacknh male female vap
gen censusyear=2000
gen year=2007

tempfile base
save `base', replace


*2010 block-level data
import delim "$RawDataPath/nhgis0026_csv/nhgis0026_ds172_2010_block.csv", clear

rename (statea countya tracta blkgrpa blocka)(fstate fcounty ftract fblockgroup fblock)

rename (h7v001 h7w002 h7w005) (population urban rural)

rename h7z010 hispanic
egen white=rowtotal(h7z003 h7z011), missing
rename h7z003 whitenh
egen black=rowtotal(h7z004 h7z012), missing

rename (h76002 h76026) (male female)
egen vap=rowtotal(h76007-h76025 h76031-h76049), missing

keep gisjoin state fstate fcounty ftract fblockgroup fblock population urban rural hispanic white whitenh black male female vap

gen censusyear=2010

tempfile censusyear2010 
save `censusyear2010'

* use 2010 census data for 2012 and 2017
gen year=2012 

append using `base'
save `base', replace

use `censusyear2010', clear

gen year=2017

append using `base'

tempfile blocks 
save `blocks', replace


* block-group level CVAP data from 5-year ACS

// 2005-2009
import delim "$DataPath/CVAP_2005-2009_ACS_csv_files/BlockGr.csv", clear

keep if lntitle=="Total"
gen fstate=substr(geoid, 8,2)
gen fcounty=substr(geoid,10,3)
gen ftract=substr(geoid,13,6)
gen fblockgroup=substr(geoid,19,1)

rename cvap_est cvap 
keep fstate fcounty ftract fblockgroup cvap 

gen year=2007
tempfile base 
save `base', replace 

// 2010-2014
import delim "$DataPath/CVAP_2010-2014_ACS_csv_files/BlockGr.csv", clear
keep if lntitle=="Total"

gen fstate=substr(geoid, 8,2)
gen fcounty=substr(geoid,10,3)
gen ftract=substr(geoid,13,6)
gen fblockgroup=substr(geoid,19,1)

rename cvap_est cvap 
keep fstate fcounty ftract fblockgroup cvap 

gen year=2012
append using `base'
save `base', replace 

// 2015-2014
import delim "$DataPath/CVAP_2015-2019_ACS_csv_files/BlockGr.csv", clear
keep if lntitle=="Total"

gen fstate=substr(geoid, 8,2)
gen fcounty=substr(geoid,10,3)
gen ftract=substr(geoid,13,6)
gen fblockgroup=substr(geoid,19,1)

rename cvap_est cvap 
keep fstate fcounty ftract fblockgroup cvap 

gen year=2017
append using `base'

destring fstate fcounty ftract fblockgroup, replace


merge 1:m year fstate fcounty ftract fblockgroup using `blocks'

drop if _m==1 // DC and PR, and 137 tracts from ACS with no block id from Census 
drop _m

egen bg_pop=total(population), by(year fstate fcounty ftract fblockgroup)
gen bg_pop_share=population/bg_pop 

// disagggregate block-group-level cvap to block-level using population share
rename cvap cvap_blockgroup
gen cvap=cvap_blockgroup*bg_pop_share

drop  cvap_blockgroup bg_pop*

cd "$OutputPath/"
save census_pop_block, replace

